# Info:
# Script requires R="4.3.1" and ggplot2="3.4.3" to function properly (you can use for example r-lib rig to change your default R version: https://github.com/r-lib/rig)
# Replace "path/to/.../" with the according path to the input-file or output directory
# feed supplementary tables 1 and 2 to the script, which can be found here: https://osf.io/n5qj6/

# On Ubuntu you need to install following library first: libgdal-dev & libfontconfig1-dev
# sudo apt install libgdal-dev
# sudo apt install libfontconfig1-dev

# Install "remotes"-package for versionized installation of necessary packages
install.packages("remotes")

# Install necessary packages with specific versions
remotes::install_version("ggplot2", version = "3.4.3")
remotes::install_version("ggnewscale", version = "0.4.10")
remotes::install_version("maptools", version = "1.1-8")
remotes::install_version("rgeos", version = "0.6-4")

# Install necessary packages
install.packages("dplyr")
install.packages("cowplot")
install.packages("ggmap")
install.packages("ggpolypath")
install.packages("gridExtra")
install.packages("raster")
install.packages("scales")
install.packages("stringr")
install.packages("svglite")

# Load libraries
library(ggplot2)
library(cowplot)
library(dplyr)
library(ggmap)
library(ggnewscale)
library(ggpolypath)
library(gridExtra)
library(maptools)
library(rgeos)
library(raster)
library(scales)
library(stringr)

################################################################################
# Fetch data for Landkreise
# If raster::getData is not working you can also load "geoData.RData" from https://osf.io/n5qj6/
DISTRICTS.shp <- raster::getData("GADM", country = "DEU", level = 2)
# Transform fetched Landkreis-datainto dataframe & filter for all Thuringian-Landkreise
DISTRICTS_DF <- fortify(DISTRICTS.shp, region = "CC_2")
DISTRICTS_DF_THURINGIA <- DISTRICTS_DF[DISTRICTS_DF$id >= "16000",] # Only Thuringian Landkreise carry an Id > 16000

# create & modify df
MOBILITY_DF <- read.csv(file = "/path/to/supplementary_table_4-mobility_data_per_thuringian_district_per_month.tsv", sep = '\t')
MOBILITY_DF$id <- as.factor(MOBILITY_DF[["id"]])

SAMPLE_COUNT_DF <- read.csv(file = "/path/to/supplementary_table_3-accumulated_cluster_samples_per_thuringian_district_per_month.tsv", sep = '\t')
SAMPLE_COUNT_DF$id <- as.factor(SAMPLE_COUNT_DF[["id"]])

# merge df with landkreis-data df
MOBILITY_MERGED_DF <- full_join(DISTRICTS_DF_THURINGIA, MOBILITY_DF, by = "id")
MOBILITY_MERGED_DF$month_factor <- factor(MOBILITY_MERGED_DF$month, levels = c("Feb", "Mar", "Apr", "May", "Jun"))
MOBILITY_MERGED_DF$cluster_factor <- factor(MOBILITY_MERGED_DF$cluster, levels = c("S:H49Y", "S:V90F", "S:N185D", "S:N354K", "S:G496S", "S:N703D", "S:T716V", "S:S939F", "ORF1b:A520V"))
MOBILITY_MERGED_DF_FILTERED <- filter(MOBILITY_MERGED_DF, month != "Jun")

SAMPLE_COUNT_MERGED_DF <- full_join(DISTRICTS_DF_THURINGIA, SAMPLE_COUNT_DF, by = "id")
SAMPLE_COUNT_MERGED_DF$month_factor <- factor(SAMPLE_COUNT_MERGED_DF$month, levels = c("Feb", "Mar", "Apr", "May", "Jun"))
SAMPLE_COUNT_MERGED_DF$cluster_factor <- factor(SAMPLE_COUNT_MERGED_DF$cluster, levels = c("S:H49Y", "S:V90F", "S:N185D", "S:N354K", "S:G496S", "S:N703D", "S:T716V", "S:S939F", "ORF1b:A520V"))
SAMPLE_COUNT_MERGED_DF_FILTERED <- filter(SAMPLE_COUNT_MERGED_DF, month != "Jun")

################################################################################
# create plot from df containing all clusters
MOBILITY_PLOT <- ggplot() +
  geom_polypath(data = filter(MOBILITY_MERGED_DF_FILTERED, infected == 1), aes(x = long, y = lat, group = group, fill = infected_mobility_percentage), color = "black", size = 0.15) +
  scale_fill_gradientn(
    limits = c(0, 100),
    colours = c("#f7e4e7", "#B52C43", "#421018"),
    values = c(0, 0.5, 1),
    guide = guide_colourbar(
      title = "District with an identified mutation.\n\nColor intensity describes proportion of\nincoming mobility from districts with\nan identified mutation (red districts).",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  new_scale("fill") +    
  geom_polypath(data = filter(MOBILITY_MERGED_DF_FILTERED, infected == 0), aes(x = long, y = lat, group = group, fill = infected_mobility_percentage), color = "black", size = 0.15) + 
  scale_fill_gradientn(
    limits = c(0, 100),
    colours = c("#b2b8d9", "#002D82", "#011345"),
    values = c(0, 0.5, 1),
    guide = guide_colourbar(
      title = "District without an identified mutation.\n\nColor intensity describes proportion of\nincoming mobility from districts with\nan identified mutation (red districts).",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  facet_grid(cols = vars(month_factor), rows = vars(cluster_factor)) +
  theme(
    legend.position = "bottom",
    legend.title = element_text(size = 15),
    legend.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    aspect.ratio = 1
  )
MOBILITY_PLOT

ggsave("thuringian_sc2_mutation_clusters_complete.svg", plot = MOBILITY_PLOT, path = "/path/to/output", width = 1200, height = 2800, units = "px", dpi = 300, scale = 3)

SAMPLE_COUNT_PLOT <- ggplot() +
  geom_polypath(data = SAMPLE_COUNT_MERGED_DF_FILTERED, aes(x = long, y = lat, group = group, fill = infected), color = "black", size = 0.15) +
  scale_fill_gradientn(
    na.value = "#adadad",
    limits = c(1, 1000),
    colours = c("#ede8ed", "#dbd1da", "#b19cb0", "#785171", "#4a3344", "#2e262c"),
    values = c(0, 0.5, 1),
    trans = scales::pseudo_log_trans(sigma = 0.1),
    breaks = c(1, 10, 100, 1000),
    guide = guide_colourbar(
      title = "Accumulated number of identified isolates per district.",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  facet_grid(cols = vars(month_factor), rows = vars(cluster_factor)) +
  theme(
    legend.position = "bottom",
    legend.title.align =  1,
    legend.spacing.x = unit(1, "cm"),
    panel.background = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    aspect.ratio = 1
  )
SAMPLE_COUNT_PLOT

ggsave("thuringian_sc2_sample_count_complete.svg", plot = SAMPLE_COUNT_PLOT, path = "/path/to/output", width = 1200, height = 2800, units = "px", dpi = 300, scale = 3)

################################################################################
# create plot from filtered df's for example clusters "S:N185D" & "S:S939F"
MOBILITY_FILTERED_DF <- filter(MOBILITY_MERGED_DF, cluster == "S:N185D" | cluster == "S:S939F")
MOBILITY_FILTERED_PLOT <- ggplot() +
  geom_polypath(data = filter(MOBILITY_FILTERED_DF, infected == 1), aes(x = long, y = lat, group = group, fill = infected_mobility_percentage), color = "#ffffff", size = 0.15) +
  scale_fill_gradientn(
    limits = c(0, 100),
    colours = c("#f7e4e7", "#B52C43", "#421018"),
    values = c(0, 0.5, 1),
    guide = guide_colourbar(
      title = "District with an identified mutation.\n\nColor intensity describes proportion of\nincoming mobility from districts with\nan identified mutation (red districts).",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  new_scale("fill") +    
  geom_polypath(data = filter(MOBILITY_FILTERED_DF, infected == 0), aes(x = long, y = lat, group = group, fill = infected_mobility_percentage), color = "#ffffff", size = 0.15) + 
  scale_fill_gradientn(
    limits = c(0, 100),
    colours = c("#b2b8d9", "#002D82", "#011345"),
    values = c(0, 0.5, 1),
    guide = guide_colourbar(
      title = "District without an identified mutation.\n\nColor intensity describes proportion of\nincoming mobility from districts with\nan identified mutation (red districts).",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  facet_grid(cols = vars(month_factor), rows = vars(cluster_factor)) +
  theme(
    legend.position = "bottom",
    legend.margin = margin(0, 0.9, 0, 0.9, "cm"),
    legend.title.align = 1,
    legend.spacing.x = unit(0.3, "cm"),
    legend.key.size = unit(0.3, "cm"),
    panel.background = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    aspect.ratio = 1,
    strip.background = element_blank()
  )
MOBILITY_FILTERED_PLOT

SAMPLE_COUNT_FILTERED_PLOT <- ggplot() +
  geom_polypath(data = filter(SAMPLE_COUNT_MERGED_DF, cluster == "S:N185D" | cluster == "S:S939F"), aes(x = long, y = lat, group = group, fill = infected), color = "#ffffff", size = 0.15) +
  scale_fill_gradientn(
    na.value = "#adadad",
    limits = c(1, 1000),
    colours = c("#ede8ed", "#dbd1da", "#b19cb0", "#785171", "#4a3344", "#2e262c"),
    values = c(0, 0.5, 1),
    trans = scales::pseudo_log_trans(sigma = 0.1),
    breaks = c(1, 10, 100, 1000),
    guide = guide_colourbar(
      title = "Accumulated number of identified isolates per district.",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  facet_grid(cols = vars(month_factor), rows = vars(cluster_factor)) +
  theme(
    legend.position = "bottom",
    legend.title.align =  1,
    legend.spacing.x = unit(1, "cm"),
    legend.key.size = unit(0.3, "cm"),
    panel.background = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    aspect.ratio = 1,
    strip.background = element_blank()
  )
SAMPLE_COUNT_FILTERED_PLOT

COMBINED_FILTERED_PLOTS = plot_grid(SAMPLE_COUNT_FILTERED_PLOT, MOBILITY_FILTERED_PLOT, ncol = 1, align = 'h')
COMBINED_FILTERED_PLOTS

ggsave("thuringian_sc2_mutation_clusters_filtered_mobility.svg", plot = MOBILITY_FILTERED_PLOT, path = "/path/to/output", width = 800, height = 500, units = "px", dpi = 300, scale = 3)
ggsave("thuringian_sc2_mutation_clusters_filtered_sample-count.svg", plot = SAMPLE_COUNT_FILTERED_PLOT, path = "/path/to/output", width = 800, height = 500, units = "px", dpi = 300, scale = 3)
#ggsave("thuringian_sc2_mutation_clusters_filtered_combined.svg", plot = COMBINED_FILTERED_PLOTS, path = "/path/to/output", width = 1200, height = 1500, units = "px", dpi = 300, scale = 3)


################################################################################
# create plot from filtered df's for example clusters "ORF1b:A520V" & "S:S939F"
MOBILITY_FILTERED_DF <- filter(MOBILITY_MERGED_DF, cluster == "ORF1b:A520V" | cluster == "S:S939F")
MOBILITY_FILTERED_PLOT <- ggplot() +
  geom_polypath(data = filter(MOBILITY_FILTERED_DF, infected == 1), aes(x = long, y = lat, group = group, fill = infected_mobility_percentage), color = "#ffffff", size = 0.15) +
  scale_fill_gradientn(
    limits = c(0, 100),
    colours = c("#f7e4e7", "#B52C43", "#421018"),
    values = c(0, 0.5, 1),
    guide = guide_colourbar(
      title = "District with an identified mutation.\n\nColor intensity describes proportion of\nincoming mobility from districts with\nan identified mutation (red districts).",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  new_scale("fill") +    
  geom_polypath(data = filter(MOBILITY_FILTERED_DF, infected == 0), aes(x = long, y = lat, group = group, fill = infected_mobility_percentage), color = "#ffffff", size = 0.15) + 
  scale_fill_gradientn(
    limits = c(0, 100),
    colours = c("#b2b8d9", "#002D82", "#011345"),
    values = c(0, 0.5, 1),
    guide = guide_colourbar(
      title = "District without an identified mutation.\n\nColor intensity describes proportion of\nincoming mobility from districts with\nan identified mutation (red districts).",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  facet_grid(cols = vars(month_factor), rows = vars(cluster_factor)) +
  theme(
    legend.position = "bottom",
    legend.margin = margin(0, 0.9, 0, 0.9, "cm"),
    legend.title.align = 1,
    legend.spacing.x = unit(0.3, "cm"),
    legend.key.size = unit(0.3, "cm"),
    panel.background = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    aspect.ratio = 1,
    strip.background = element_blank()
  )
MOBILITY_FILTERED_PLOT

SAMPLE_COUNT_FILTERED_PLOT <- ggplot() +
  geom_polypath(data = filter(SAMPLE_COUNT_MERGED_DF, cluster == "ORF1b:A520V" | cluster == "S:S939F"), aes(x = long, y = lat, group = group, fill = infected), color = "#ffffff", size = 0.15) +
  scale_fill_gradientn(
    na.value = "#adadad",
    limits = c(1, 1000),
    colours = c("#ede8ed", "#dbd1da", "#b19cb0", "#785171", "#4a3344", "#2e262c"),
    values = c(0, 0.5, 1),
    trans = scales::pseudo_log_trans(sigma = 0.1),
    breaks = c(1, 10, 100, 1000),
    guide = guide_colourbar(
      title = "Accumulated number of identified isolates per district.",
      title.position = "top",
      direction = "horizontal",
      barwidth = unit(7, "cm")
    )
  ) +
  facet_grid(cols = vars(month_factor), rows = vars(cluster_factor)) +
  theme(
    legend.position = "bottom",
    legend.title.align =  1,
    legend.spacing.x = unit(1, "cm"),
    legend.key.size = unit(0.3, "cm"),
    panel.background = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    aspect.ratio = 1,
    strip.background = element_blank()
  )
SAMPLE_COUNT_FILTERED_PLOT

COMBINED_FILTERED_PLOTS = plot_grid(SAMPLE_COUNT_FILTERED_PLOT, MOBILITY_FILTERED_PLOT, ncol = 1, align = 'h')
COMBINED_FILTERED_PLOTS

ggsave("thuringian_sc2_mutation_clusters_filtered_mobility.svg", plot = MOBILITY_FILTERED_PLOT, path = "/path/to/output", width = 800, height = 500, units = "px", dpi = 300, scale = 3)
ggsave("thuringian_sc2_mutation_clusters_filtered_sample-count.svg", plot = SAMPLE_COUNT_FILTERED_PLOT, path = "/path/to/output", width = 800, height = 500, units = "px", dpi = 300, scale = 3)
ggsave("thuringian_sc2_mutation_clusters_filtered_combined.svg", plot = COMBINED_FILTERED_PLOTS, path = "/path/to/output", width = 1200, height = 1500, units = "px", dpi = 300, scale = 3)